In [1]:
                                #-----> STUDENT ALCOHOL  ANALYTICS <-------'''  
In [2]:
#PACKAG SECTION

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [3]:
#IMPORT DATA_FILE'S

maths_data=pd.read_csv('student_maths.csv')
por_data=pd.read_csv('student_por.csv')
In [4]:
#Rename our column name into understandable Format

maths_data=maths_data.rename(columns={'sex':'Sex','age':'Age','address':'Address','Medu':'Mother_Education','Fedu':'Father_Education','Mjob':'Mother_Job','Fjob':'Father_Job','guardian':'Guardian','famsize':'Fam_size'})
In [5]:
#Statistic Information

maths_data.describe()
Out[5]:
Age Mother_Education Father_Education traveltime studytime failures famrel freetime goout Dalc Walc health absences G1 G2 G3
count 395.000000 395.000000 395.000000 395.000000 395.000000 395.000000 395.000000 395.000000 395.000000 395.000000 395.000000 395.000000 395.000000 395.000000 395.000000 395.000000
mean 16.696203 2.749367 2.521519 1.448101 2.035443 0.334177 3.944304 3.235443 3.108861 1.481013 2.291139 3.554430 5.708861 10.908861 10.713924 10.415190
std 1.276043 1.094735 1.088201 0.697505 0.839240 0.743651 0.896659 0.998862 1.113278 0.890741 1.287897 1.390303 8.003096 3.319195 3.761505 4.581443
min 15.000000 0.000000 0.000000 1.000000 1.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 3.000000 0.000000 0.000000
25% 16.000000 2.000000 2.000000 1.000000 1.000000 0.000000 4.000000 3.000000 2.000000 1.000000 1.000000 3.000000 0.000000 8.000000 9.000000 8.000000
50% 17.000000 3.000000 2.000000 1.000000 2.000000 0.000000 4.000000 3.000000 3.000000 1.000000 2.000000 4.000000 4.000000 11.000000 11.000000 11.000000
75% 18.000000 4.000000 3.000000 2.000000 2.000000 0.000000 5.000000 4.000000 4.000000 2.000000 3.000000 5.000000 8.000000 13.000000 13.000000 14.000000
max 22.000000 4.000000 4.000000 4.000000 4.000000 3.000000 5.000000 5.000000 5.000000 5.000000 5.000000 5.000000 75.000000 19.000000 19.000000 20.000000
In [6]:
#Find the Special information about the dataset
maths_data.describe(include='O')
Out[6]:
school Sex Address Fam_size Pstatus Mother_Job Father_Job reason Guardian schoolsup famsup paid activities nursery higher internet romantic
count 395 395 395 395 395 395 395 395 395 395 395 395 395 395 395 395 395
unique 2 2 2 2 2 5 5 4 3 2 2 2 2 2 2 2 2
top GP F U GT3 T other other course mother no yes no yes yes yes yes no
freq 349 208 307 281 354 141 217 145 273 344 242 214 201 314 375 329 263
In [7]:
#Replace  Value 'F' into "Female"  &  'M' into "Male in 'Sex'..."

maths_data['Sex']=maths_data['Sex'].replace('F','Female')
maths_data['Sex']=maths_data['Sex'].replace('M','Male')
In [8]:
#Missing_value finding at Column wise:

maths_data.isnull().sum()
Out[8]:
school              0
Sex                 0
Age                 0
Address             0
Fam_size            0
Pstatus             0
Mother_Education    0
Father_Education    0
Mother_Job          0
Father_Job          0
reason              0
Guardian            0
traveltime          0
studytime           0
failures            0
schoolsup           0
famsup              0
paid                0
activities          0
nursery             0
higher              0
internet            0
romantic            0
famrel              0
freetime            0
goout               0
Dalc                0
Walc                0
health              0
absences            0
G1                  0
G2                  0
G3                  0
dtype: int64
In [9]:
# Missing_value finding in entire Dataset:

maths_data.isnull().sum().sum()
Out[9]:
0
In [10]:
#finding Duplicates in our dataset 

maths_data.duplicated().sum()

# "This dataset can't have an duplicates"
Out[10]:
0
In [11]:
maths_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 395 entries, 0 to 394
Data columns (total 33 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   school            395 non-null    object
 1   Sex               395 non-null    object
 2   Age               395 non-null    int64 
 3   Address           395 non-null    object
 4   Fam_size          395 non-null    object
 5   Pstatus           395 non-null    object
 6   Mother_Education  395 non-null    int64 
 7   Father_Education  395 non-null    int64 
 8   Mother_Job        395 non-null    object
 9   Father_Job        395 non-null    object
 10  reason            395 non-null    object
 11  Guardian          395 non-null    object
 12  traveltime        395 non-null    int64 
 13  studytime         395 non-null    int64 
 14  failures          395 non-null    int64 
 15  schoolsup         395 non-null    object
 16  famsup            395 non-null    object
 17  paid              395 non-null    object
 18  activities        395 non-null    object
 19  nursery           395 non-null    object
 20  higher            395 non-null    object
 21  internet          395 non-null    object
 22  romantic          395 non-null    object
 23  famrel            395 non-null    int64 
 24  freetime          395 non-null    int64 
 25  goout             395 non-null    int64 
 26  Dalc              395 non-null    int64 
 27  Walc              395 non-null    int64 
 28  health            395 non-null    int64 
 29  absences          395 non-null    int64 
 30  G1                395 non-null    int64 
 31  G2                395 non-null    int64 
 32  G3                395 non-null    int64 
dtypes: int64(16), object(17)
memory usage: 102.0+ KB
In [12]:
#Find the Student details based on "Living-Together Parants Status"

Get_Apart_parants=(maths_data['Pstatus']==input("Enter The Parent Status:"))
maths_data[Get_Apart_parants]
Enter The Parent Status:A
Out[12]:
school Sex Age Address Fam_size Pstatus Mother_Education Father_Education Mother_Job Father_Job ... famrel freetime goout Dalc Walc health absences G1 G2 G3
0 GP Female 18 U GT3 A 4 4 at_home teacher ... 4 3 4 1 1 3 6 5 6 6
7 GP Female 17 U GT3 A 4 4 other teacher ... 4 1 4 1 1 1 6 6 5 6
8 GP Male 15 U LE3 A 3 2 services other ... 4 2 2 1 1 1 0 16 18 19
14 GP Male 15 U GT3 A 2 2 other other ... 4 5 2 1 1 3 0 14 16 16
28 GP Male 16 U LE3 A 3 4 services other ... 5 3 3 1 1 5 4 11 11 11
37 GP Male 16 R GT3 A 4 4 other teacher ... 2 4 3 1 1 5 7 15 16 15
45 GP Female 15 U LE3 A 4 3 other other ... 5 2 2 1 1 5 8 8 8 6
46 GP Female 16 U LE3 A 3 3 other services ... 2 3 5 1 4 3 12 11 12 11
52 GP Male 15 U LE3 A 4 2 health health ... 5 5 5 3 4 5 6 11 11 10
54 GP Female 15 U LE3 A 3 3 other other ... 5 3 4 4 4 1 6 10 13 13
55 GP Female 16 U GT3 A 2 1 other other ... 5 3 4 1 1 2 8 8 9 10
56 GP Female 15 U GT3 A 4 3 services services ... 4 3 2 1 1 1 0 14 15 15
66 GP Male 15 U GT3 A 4 4 other services ... 1 3 3 5 5 3 4 13 13 12
89 GP Male 16 U LE3 A 4 4 teacher health ... 4 1 3 3 5 5 18 8 6 7
104 GP Male 15 U GT3 A 3 4 services other ... 5 4 4 1 1 1 0 16 18 18
105 GP Female 15 U GT3 A 3 3 other health ... 4 3 3 1 1 4 10 10 11 11
110 GP Male 15 U LE3 A 4 4 teacher teacher ... 5 5 3 1 1 4 6 18 19 19
126 GP Female 15 U LE3 A 3 4 other other ... 5 3 2 1 1 1 0 7 10 11
133 GP Female 16 U GT3 A 3 4 services other ... 3 2 1 1 4 5 16 12 11 11
137 GP Female 16 U GT3 A 3 3 other other ... 4 3 2 1 1 5 0 4 0 0
149 GP Male 15 U LE3 A 2 1 services other ... 4 5 5 2 5 5 0 8 9 10
188 GP Female 17 U GT3 A 3 3 health other ... 3 3 3 1 3 3 6 8 7 9
206 GP Female 16 U GT3 A 3 1 services other ... 2 3 3 2 2 4 5 7 7 7
212 GP Female 16 U GT3 A 2 2 other other ... 3 3 4 1 1 4 0 12 13 14
229 GP Female 17 U GT3 A 2 1 other other ... 3 2 3 1 2 3 10 12 10 12
241 GP Male 17 R LE3 A 4 4 teacher other ... 3 3 3 2 3 4 2 10 11 12
257 GP Male 19 U LE3 A 4 3 services at_home ... 4 3 1 1 1 1 12 11 11 11
265 GP Male 18 R LE3 A 3 4 other other ... 4 2 5 3 4 1 13 17 17 17
276 GP Female 18 R GT3 A 3 2 other services ... 4 1 1 1 1 5 75 10 9 9
280 GP Male 17 U LE3 A 4 1 services other ... 4 5 4 2 4 5 30 8 8 8
281 GP Male 17 U LE3 A 3 2 teacher services ... 4 4 4 3 4 3 19 11 9 10
289 GP Male 18 U LE3 A 4 4 teacher teacher ... 5 4 3 1 1 2 9 15 13 15
300 GP Female 18 U LE3 A 4 4 health other ... 4 2 4 1 1 4 14 12 10 11
306 GP Male 20 U GT3 A 3 2 services other ... 5 5 3 1 1 5 0 17 18 18
320 GP Female 17 U GT3 A 4 3 services services ... 5 2 2 1 2 5 23 13 13 13
336 GP Female 19 R GT3 A 3 1 services at_home ... 5 4 3 1 2 5 12 14 13 13
339 GP Female 17 R GT3 A 3 2 other other ... 4 3 3 2 3 2 4 9 10 10
343 GP Female 17 U GT3 A 2 2 at_home at_home ... 3 3 1 1 2 4 0 9 8 0
357 MS Female 17 U LE3 A 3 2 services other ... 1 2 3 1 2 5 2 12 12 11
360 MS Female 18 R LE3 A 1 4 at_home other ... 4 3 4 1 4 5 0 13 13 13
390 MS Male 20 U LE3 A 2 2 services services ... 5 5 4 4 5 4 11 9 9 9

41 rows × 33 columns

In [13]:
No_education=(maths_data['Mother_Education']==0)|(maths_data['Father_Education']==0)
maths_data[No_education]
Out[13]:
school Sex Age Address Fam_size Pstatus Mother_Education Father_Education Mother_Job Father_Job ... famrel freetime goout Dalc Walc health absences G1 G2 G3
76 GP Male 15 U GT3 T 4 0 teacher other ... 3 4 3 1 1 1 8 11 11 10
127 GP Female 19 U GT3 T 0 1 at_home other ... 3 4 2 1 1 5 2 7 8 9
171 GP Male 16 U GT3 T 1 0 other other ... 4 3 2 1 1 3 2 13 15 16
249 GP Male 16 U GT3 T 0 2 other other ... 4 3 2 2 4 5 0 13 15 15
324 GP Female 17 U LE3 T 0 2 at_home at_home ... 3 3 3 2 3 2 0 16 15 15

5 rows × 33 columns

In [14]:
#get the details who are take most absences

max_absence=maths_data['absences'].max()
max_absence_stu=maths_data.loc[maths_data['absences']==max_absence]
pd.DataFrame(max_absence_stu)
Out[14]:
school Sex Age Address Fam_size Pstatus Mother_Education Father_Education Mother_Job Father_Job ... famrel freetime goout Dalc Walc health absences G1 G2 G3
276 GP Female 18 R GT3 A 3 2 other services ... 4 1 1 1 1 5 75 10 9 9

1 rows × 33 columns

In [15]:
#female who study MAX Hrs and

max_study=(maths_data['studytime']==maths_data['studytime'].max())&((maths_data['Sex']=="Female"))
maths_data[max_study]
Out[15]:
school Sex Age Address Fam_size Pstatus Mother_Education Father_Education Mother_Job Father_Job ... famrel freetime goout Dalc Walc health absences G1 G2 G3
67 GP Female 16 U GT3 T 3 1 services other ... 4 3 3 1 2 5 4 7 7 6
69 GP Female 15 R LE3 T 3 1 other other ... 4 4 2 2 3 3 12 16 16 16
77 GP Female 16 U GT3 T 2 2 other other ... 5 2 3 1 3 3 0 11 11 11
95 GP Female 15 R GT3 T 1 1 at_home other ... 3 1 2 1 1 1 2 7 10 10
105 GP Female 15 U GT3 A 3 3 other health ... 4 3 3 1 1 4 10 10 11 11
106 GP Female 15 U GT3 T 2 2 other other ... 5 1 2 1 1 3 8 7 8 8
204 GP Female 16 R GT3 T 2 2 services services ... 5 3 5 1 1 5 6 10 10 11
210 GP Female 19 U GT3 T 3 3 other other ... 4 3 3 1 2 3 10 8 8 8
256 GP Female 17 U LE3 T 4 2 teacher services ... 4 2 3 1 1 4 6 14 12 13
259 GP Female 17 U LE3 T 2 2 services services ... 3 4 1 1 1 2 0 10 9 0
271 GP Female 18 U GT3 T 2 3 other services ... 4 5 5 1 3 2 4 15 14 14
282 GP Female 18 R LE3 T 1 1 at_home other ... 5 2 2 1 1 3 1 12 12 12
293 GP Female 17 R LE3 T 3 1 services other ... 3 1 2 1 1 3 6 18 18 18
298 GP Female 18 U GT3 T 4 3 other other ... 4 3 3 1 1 3 0 14 13 14
303 GP Female 17 U GT3 T 3 2 health health ... 5 2 2 1 2 5 0 17 17 18
334 GP Female 18 R GT3 T 2 2 at_home other ... 4 4 4 1 1 4 0 10 9 0
338 GP Female 18 U LE3 T 3 3 services services ... 5 3 3 1 1 1 7 16 15 17

17 rows × 33 columns

In [16]:
Corelation=maths_data['G3'].corr(maths_data['Dalc'])
Corelation
Out[16]:
-0.0546600405667359
In [17]:
#daliy & week alcolol consumption student  study time avarge

Groupby_Dalc=maths_data.groupby('Dalc')
for Gender,value  in Groupby_Dalc['studytime']:
    print("Work Day Alcocol Consumption:",(Gender,value.mean()))

print("\n")
Groupby_Walc=maths_data.groupby('Walc')
for Gender,value  in Groupby_Walc['studytime']:
    print("Week Day Alcocol Consumption:",(Gender,value.mean()))    
Work Day Alcocol Consumption: (1, 2.1449275362318843)
Work Day Alcocol Consumption: (2, 1.88)
Work Day Alcocol Consumption: (3, 1.5384615384615385)
Work Day Alcocol Consumption: (4, 1.7777777777777777)
Work Day Alcocol Consumption: (5, 1.6666666666666667)


Week Day Alcocol Consumption: (1, 2.2847682119205297)
Week Day Alcocol Consumption: (2, 1.9764705882352942)
Week Day Alcocol Consumption: (3, 1.9875)
Week Day Alcocol Consumption: (4, 1.6470588235294117)
Week Day Alcocol Consumption: (5, 1.7142857142857142)
In [18]:
#the code find the Gender-wise alcocol consumption in workingdays


Groupby_Walc=maths_data.groupby('Sex')
for Gender,value  in Groupby_Walc['Walc']:
    print("Work Day Alcocol Consumption:",(Gender,value.mean()))
Work Day Alcocol Consumption: ('Female', 1.9567307692307692)
Work Day Alcocol Consumption: ('Male', 2.663101604278075)
In [19]:
#Probability of Internet is availble or not based on Father& Mother Job  
from sklearn.naive_bayes import GaussianNB 
from sklearn import preprocessing

le = preprocessing.LabelEncoder()
mjob_encoded=le.fit_transform(maths_data['Mother_Job'])
fjob_encoded=le.fit_transform(maths_data['Father_Job'])
internet_encoded=le.fit_transform(maths_data['internet'])

features=np.column_stack((mjob_encoded,fjob_encoded))

model = GaussianNB()

model.fit(features,internet_encoded)

predicted= model.predict([[4,5]])
print("Predicted Value:", predicted)
Predicted Value: [1]
In [20]:
#Normalization

value=maths_data['absences']

sns.distplot(value,hist=False)
plt.show()
C:\Users\BOOO\AppData\Local\Temp\ipykernel_5456\3424027265.py:5: UserWarning: 

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `kdeplot` (an axes-level function for kernel density plots).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  sns.distplot(value,hist=False)
In [21]:
#appply Normalization

Z_score_standarization=(value-value.mean())/value.std()
sns.distplot(Z_score_standarization,hist=False)

plt.show()
C:\Users\BOOO\AppData\Local\Temp\ipykernel_5456\418455001.py:4: UserWarning: 

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `kdeplot` (an axes-level function for kernel density plots).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  sns.distplot(Z_score_standarization,hist=False)
In [22]:
#bOXPLOT 

sns.boxplot(y=maths_data['absences'],x=maths_data['Age'])
plt.xticks(rotation=90)
plt.show()
In [23]:
#to spend more time with family its reduce the daily alcocol consumpution

sns.lmplot(data=maths_data,x='famrel',y='Dalc',hue='Sex',scatter=None)
C:\Users\BOOO\AppData\Local\anaconda3\Lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight
  self._figure.tight_layout(*args, **kwargs)
Out[23]:
<seaborn.axisgrid.FacetGrid at 0x28e7042ced0>
In [24]:
p=maths_data.pivot_table(columns='Sex',index='Walc',aggfunc='size')
sns.heatmap(p,annot=True)
plt.legend()
plt.xlabel("Gender")
plt.ylabel("WeekDay's")

plt.show()
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
In [25]:
sns.histplot(maths_data['absences'],kde=True,color="m")
plt.show()
In [26]:
maths_data['traveltime'].value_counts().plot(marker="o")
maths_data['studytime'].value_counts().plot(marker="D")
plt.legend()
plt.show()
In [27]:
import plotly.express as p

x=maths_data['G3']
fi=p.scatter_3d(maths_data,x='G3',y="Dalc",z='Walc',color=x,title="Final Grade depend on Daily & Weekly Alcohol intake")
fi.update_layout(showlegend=True)
fi.show()
In [28]:
g = sns.PairGrid(data=maths_data,hue='Sex')
g.map_diag(sns.histplot) # diagonal
g.map_upper(sns.kdeplot) # upper 
g.map_lower(sns.scatterplot) # lo
Out[28]:
<seaborn.axisgrid.PairGrid at 0x28e78432490>
In [29]:
sns.jointplot(data=maths_data,x='health',y='Age',kind='hist',hue='Dalc' )

plt.grid()
plt.suptitle("Health radio Age wise")
plt.show()
In [30]:
import plotly.express as px

cal=sns.countplot(data=maths_data,x='higher',hue='Walc',palette="Set1",linewidth=1)

for p in cal.patches:
    po='{:.1f}%'.format(100*p.get_height()/395)
    x=p.get_x()+p.get_width()
    y=p.get_height()
    
    cal.annotate(po,(x,y),ha='center',color='k')

plt.show()
In [31]:
import plotly.express as px

cal=sns.countplot(data=maths_data,x='higher',hue='Dalc',palette="Set1",linewidth=1)

for p in cal.patches:
    po='{:.1f}%'.format(100*p.get_height()/395)
    x=p.get_x()+p.get_width()
    y=p.get_height()
    
    cal.annotate(po,(x,y),ha='center',color='k')

plt.show()
In [32]:
#counts of job
maths_data['Mother_Job'].value_counts().plot.pie(autopct='%1.2f%%')
plt.legend()
plt.title("Mother Job")
plt.show()

maths_data['Father_Job'].value_counts().plot.pie(autopct='%1.2f%%')
plt.legend()
plt.title("Father Job")
plt.show()
In [33]:
Get_Activities=(maths_data['activities']==input("Enter Extra Activities:"))
maths_data[Get_Activities]
Enter Extra Activities:yes
Out[33]:
school Sex Age Address Fam_size Pstatus Mother_Education Father_Education Mother_Job Father_Job ... famrel freetime goout Dalc Walc health absences G1 G2 G3
3 GP Female 15 U GT3 T 4 2 health services ... 3 2 2 1 1 5 2 15 14 15
5 GP Male 16 U LE3 T 4 3 services other ... 5 4 2 1 2 5 10 15 15 15
9 GP Male 15 U GT3 T 3 4 other other ... 5 5 1 1 1 5 0 14 15 15
11 GP Female 15 U GT3 T 2 1 services other ... 5 2 2 1 1 4 4 10 12 12
12 GP Male 15 U LE3 T 4 4 health services ... 4 3 3 1 3 5 2 14 14 14
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
381 MS Male 18 R GT3 T 2 1 other other ... 4 4 3 1 3 5 5 7 6 7
382 MS Male 17 U GT3 T 2 3 other services ... 4 4 3 1 1 3 2 11 11 10
386 MS Female 18 R GT3 T 4 4 teacher at_home ... 4 4 3 2 2 5 7 6 5 6
387 MS Female 19 R GT3 T 2 3 services other ... 5 4 2 1 2 5 0 7 5 0
389 MS Female 18 U GT3 T 1 1 other other ... 1 1 1 1 1 5 0 6 5 0

201 rows × 33 columns

In [34]:
maths_data[Get_Activities]['Dalc'].value_counts().plot.pie(autopct='%1.2f%%')
plt.title("School Support for who intake Alcohol Daily")
plt.show()
In [35]:
dalc=maths_data['Dalc'].value_counts()
walc=maths_data['Walc'].value_counts()
plt.plot(dalc,':',color='red',marker='s')
plt.plot(walc,'-.',color='m',marker='o')
plt.legend('DW')
plt.xlabel('Level Of alcohol intake')
plt.ylabel('Number of students')
plt.title("Student Count's ")
plt.show()
In [ ]:
 
In [ ]:
 
In [ ]: